set.seed(1)
project_2_data = 
  read_csv("project_2_data.csv", na = c("NA", "", ".")) |>
  janitor::clean_names()  |>
  mutate(status = ifelse(status == "Dead", 1, 0))
## Rows: 4024 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Race, Marital Status, T Stage, N Stage, 6th Stage, differentiate, ...
## dbl  (5): Age, Tumor Size, Regional Node Examined, Reginol Node Positive, Su...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dead <- project_2_data |>
  filter(status == 1)

alive <- project_2_data |>
  filter(status == 0) |>
  sample_n(nrow(dead))

project_2_numdata<-
  bind_rows(dead, alive) |>
  mutate(
    t_stage = case_when(
    t_stage == "T1" ~ 1,
    t_stage == "T2" ~ 2,
    t_stage == "T3" ~ 3,
    t_stage == "T4" ~ 4,
    TRUE ~ NA_real_),
    n_stage = case_when(
    n_stage == "N1" ~ 1,
    n_stage == "N2" ~ 2,
    n_stage == "N3" ~ 3,
    TRUE ~ NA_real_),
    differentiate = case_when(
    differentiate == "Well differentiated" ~ 1,
    differentiate == "Moderately differentiated" ~ 2,
    differentiate == "Poorly differentiated" ~ 3,
    differentiate == "Undifferentiated" ~ 4,
    TRUE ~ NA_real_),
    grade = case_when(
    grade == "anaplastic; Grade IV" ~ 4,
    grade == "3" ~ 3,
    grade == "2" ~ 2,
    grade == "1" ~ 1,
    TRUE ~ NA_real_),
    a_stage_regional = ifelse(a_stage == "Regional", 1, 0),
    estrogen_status = ifelse(estrogen_status == "Positive", 1, 0),
    progesterone_status = ifelse(progesterone_status == "Positive", 1, 0)
    ) |>
  select(-a_stage)
variables <- names(project_2_numdata)[!names(project_2_numdata) %in% c("survival_months", "status")]
for (var in variables) {
  formula <- as.formula(paste("Surv(survival_months, status) ~", var))
  model <- coxph(formula, data = project_2_numdata)
  print(var)
  print(summary(model))
}
## [1] "age"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 1232, number of events= 616 
## 
##         coef exp(coef) se(coef)     z Pr(>|z|)   
## age 0.013313  1.013402 0.004454 2.989   0.0028 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##     exp(coef) exp(-coef) lower .95 upper .95
## age     1.013     0.9868     1.005     1.022
## 
## Concordance= 0.536  (se = 0.012 )
## Likelihood ratio test= 9.05  on 1 df,   p=0.003
## Wald test            = 8.93  on 1 df,   p=0.003
## Score (logrank) test = 8.95  on 1 df,   p=0.003
## 
## [1] "race"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 1232, number of events= 616 
## 
##              coef exp(coef) se(coef)      z Pr(>|z|)   
## raceOther -0.5965    0.5508   0.2099 -2.842  0.00448 **
## raceWhite -0.3263    0.7216   0.1252 -2.607  0.00914 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## raceOther    0.5508      1.816    0.3650    0.8310
## raceWhite    0.7216      1.386    0.5646    0.9222
## 
## Concordance= 0.527  (se = 0.008 )
## Likelihood ratio test= 9.46  on 2 df,   p=0.009
## Wald test            = 9.81  on 2 df,   p=0.007
## Score (logrank) test = 9.92  on 2 df,   p=0.007
## 
## [1] "marital_status"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 1232, number of events= 616 
## 
##                             coef exp(coef) se(coef)      z Pr(>|z|)  
## marital_statusMarried   -0.23876   0.78761  0.11794 -2.024   0.0429 *
## marital_statusSeparated  0.53873   1.71383  0.27908  1.930   0.0536 .
## marital_statusSingle    -0.10841   0.89726  0.14404 -0.753   0.4517  
## marital_statusWidowed    0.04877   1.04998  0.17757  0.275   0.7836  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                         exp(coef) exp(-coef) lower .95 upper .95
## marital_statusMarried      0.7876     1.2697    0.6251    0.9924
## marital_statusSeparated    1.7138     0.5835    0.9918    2.9616
## marital_statusSingle       0.8973     1.1145    0.6766    1.1899
## marital_statusWidowed      1.0500     0.9524    0.7414    1.4871
## 
## Concordance= 0.537  (se = 0.011 )
## Likelihood ratio test= 12.55  on 4 df,   p=0.01
## Wald test            = 14  on 4 df,   p=0.007
## Score (logrank) test = 14.36  on 4 df,   p=0.006
## 
## [1] "t_stage"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 1232, number of events= 616 
## 
##            coef exp(coef) se(coef)     z Pr(>|z|)    
## t_stage 0.38440   1.46874  0.04713 8.156 3.48e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## t_stage     1.469     0.6809     1.339     1.611
## 
## Concordance= 0.582  (se = 0.011 )
## Likelihood ratio test= 62.94  on 1 df,   p=2e-15
## Wald test            = 66.51  on 1 df,   p=3e-16
## Score (logrank) test = 67.36  on 1 df,   p=2e-16
## 
## [1] "n_stage"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 1232, number of events= 616 
## 
##            coef exp(coef) se(coef)    z Pr(>|z|)    
## n_stage 0.61870   1.85652  0.04835 12.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## n_stage     1.857     0.5386     1.689     2.041
## 
## Concordance= 0.623  (se = 0.011 )
## Likelihood ratio test= 152  on 1 df,   p=<2e-16
## Wald test            = 163.7  on 1 df,   p=<2e-16
## Score (logrank) test = 175.6  on 1 df,   p=<2e-16
## 
## [1] "x6th_stage"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 1232, number of events= 616 
## 
##                  coef exp(coef) se(coef)      z Pr(>|z|)    
## x6th_stageIIB  0.4390    1.5512   0.1336  3.287  0.00101 ** 
## x6th_stageIIIA 0.7669    2.1530   0.1260  6.088 1.15e-09 ***
## x6th_stageIIIB 1.4792    4.3893   0.2464  6.003 1.94e-09 ***
## x6th_stageIIIC 1.5199    4.5718   0.1274 11.934  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                exp(coef) exp(-coef) lower .95 upper .95
## x6th_stageIIB      1.551     0.6447     1.194     2.015
## x6th_stageIIIA     2.153     0.4645     1.682     2.756
## x6th_stageIIIB     4.389     0.2278     2.708     7.115
## x6th_stageIIIC     4.572     0.2187     3.562     5.868
## 
## Concordance= 0.637  (se = 0.012 )
## Likelihood ratio test= 171.5  on 4 df,   p=<2e-16
## Wald test            = 177.7  on 4 df,   p=<2e-16
## Score (logrank) test = 198.8  on 4 df,   p=<2e-16
## 
## [1] "differentiate"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 1232, number of events= 616 
## 
##                  coef exp(coef) se(coef)     z Pr(>|z|)    
## differentiate 0.44124   1.55463  0.06433 6.859 6.94e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##               exp(coef) exp(-coef) lower .95 upper .95
## differentiate     1.555     0.6432      1.37     1.764
## 
## Concordance= 0.574  (se = 0.011 )
## Likelihood ratio test= 48.05  on 1 df,   p=4e-12
## Wald test            = 47.04  on 1 df,   p=7e-12
## Score (logrank) test = 47.37  on 1 df,   p=6e-12
## 
## [1] "grade"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 1232, number of events= 616 
## 
##          coef exp(coef) se(coef)     z Pr(>|z|)    
## grade 0.44124   1.55463  0.06433 6.859 6.94e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##       exp(coef) exp(-coef) lower .95 upper .95
## grade     1.555     0.6432      1.37     1.764
## 
## Concordance= 0.574  (se = 0.011 )
## Likelihood ratio test= 48.05  on 1 df,   p=4e-12
## Wald test            = 47.04  on 1 df,   p=7e-12
## Score (logrank) test = 47.37  on 1 df,   p=6e-12
## 
## [1] "tumor_size"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 1232, number of events= 616 
## 
##                coef exp(coef) se(coef)    z Pr(>|z|)    
## tumor_size 0.011933  1.012005 0.001574 7.58 3.46e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##            exp(coef) exp(-coef) lower .95 upper .95
## tumor_size     1.012     0.9881     1.009     1.015
## 
## Concordance= 0.592  (se = 0.012 )
## Likelihood ratio test= 50.4  on 1 df,   p=1e-12
## Wald test            = 57.45  on 1 df,   p=3e-14
## Score (logrank) test = 58.25  on 1 df,   p=2e-14
## 
## [1] "estrogen_status"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 1232, number of events= 616 
## 
##                    coef exp(coef) se(coef)      z Pr(>|z|)    
## estrogen_status -0.9108    0.4022   0.1064 -8.562   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## estrogen_status    0.4022      2.486    0.3265    0.4954
## 
## Concordance= 0.565  (se = 0.008 )
## Likelihood ratio test= 60.02  on 1 df,   p=9e-15
## Wald test            = 73.31  on 1 df,   p=<2e-16
## Score (logrank) test = 78.48  on 1 df,   p=<2e-16
## 
## [1] "progesterone_status"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 1232, number of events= 616 
## 
##                         coef exp(coef) se(coef)      z Pr(>|z|)    
## progesterone_status -0.71314   0.49010  0.08583 -8.308   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                     exp(coef) exp(-coef) lower .95 upper .95
## progesterone_status    0.4901       2.04    0.4142    0.5799
## 
## Concordance= 0.588  (se = 0.01 )
## Likelihood ratio test= 62.82  on 1 df,   p=2e-15
## Wald test            = 69.03  on 1 df,   p=<2e-16
## Score (logrank) test = 71.97  on 1 df,   p=<2e-16
## 
## [1] "regional_node_examined"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 1232, number of events= 616 
## 
##                            coef exp(coef) se(coef)     z Pr(>|z|)  
## regional_node_examined 0.010506  1.010561 0.004982 2.109    0.035 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                        exp(coef) exp(-coef) lower .95 upper .95
## regional_node_examined     1.011     0.9895     1.001      1.02
## 
## Concordance= 0.521  (se = 0.013 )
## Likelihood ratio test= 4.35  on 1 df,   p=0.04
## Wald test            = 4.45  on 1 df,   p=0.03
## Score (logrank) test = 4.44  on 1 df,   p=0.04
## 
## [1] "reginol_node_positive"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 1232, number of events= 616 
## 
##                           coef exp(coef) se(coef)     z Pr(>|z|)    
## reginol_node_positive 0.054926  1.056462 0.004627 11.87   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                       exp(coef) exp(-coef) lower .95 upper .95
## reginol_node_positive     1.056     0.9466     1.047     1.066
## 
## Concordance= 0.627  (se = 0.012 )
## Likelihood ratio test= 106.3  on 1 df,   p=<2e-16
## Wald test            = 140.9  on 1 df,   p=<2e-16
## Score (logrank) test = 148  on 1 df,   p=<2e-16
## 
## [1] "a_stage_regional"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 1232, number of events= 616 
## 
##                     coef exp(coef) se(coef)      z Pr(>|z|)    
## a_stage_regional -1.1046    0.3314   0.1747 -6.324 2.55e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## a_stage_regional    0.3314      3.018    0.2353    0.4666
## 
## Concordance= 0.522  (se = 0.005 )
## Likelihood ratio test= 29.49  on 1 df,   p=6e-08
## Wald test            = 39.99  on 1 df,   p=3e-10
## Score (logrank) test = 44.22  on 1 df,   p=3e-11

All variables are statistically significant except for marriage in “marital_status”

project_2_numdata <-
  project_2_numdata|>
  mutate(Married = ifelse(marital_status == "Married", 1, 0)) |>
  select(-differentiate, - marital_status)

cor_matrix <- cor(project_2_numdata[, sapply(project_2_numdata, is.numeric)])
print(cor_matrix)
##                                age      t_stage     n_stage       grade
## age                     1.00000000 -0.050113404 -0.01706295 -0.12321448
## t_stage                -0.05011340  1.000000000  0.30612098  0.15076838
## n_stage                -0.01706295  0.306120984  1.00000000  0.17991601
## grade                  -0.12321448  0.150768380  0.17991601  1.00000000
## tumor_size             -0.07953275  0.784997546  0.30277440  0.14403058
## estrogen_status         0.12111658 -0.106722673 -0.12700846 -0.26247316
## progesterone_status     0.01233580 -0.071974613 -0.11108594 -0.21201525
## regional_node_examined -0.03141638  0.088442919  0.36808270  0.07651544
## reginol_node_positive   0.01628318  0.244266968  0.82874594  0.15163577
## survival_months        -0.08599693 -0.159716020 -0.24737291 -0.15052637
## status                  0.06976636  0.225250913  0.34556514  0.19151006
## a_stage_regional        0.04777171 -0.234675561 -0.25575034 -0.04874199
## Married                -0.04796418  0.003435462 -0.05151826 -0.02717180
##                         tumor_size estrogen_status progesterone_status
## age                    -0.07953275     0.121116575          0.01233580
## t_stage                 0.78499755    -0.106722673         -0.07197461
## n_stage                 0.30277440    -0.127008462         -0.11108594
## grade                   0.14403058    -0.262473159         -0.21201525
## tumor_size              1.00000000    -0.123020668         -0.08438705
## estrogen_status        -0.12302067     1.000000000          0.58504596
## progesterone_status    -0.08438705     0.585045962          1.00000000
## regional_node_examined  0.11473470    -0.041404301         -0.01390137
## reginol_node_positive   0.24878553    -0.100873585         -0.07105526
## survival_months        -0.16151178     0.227274035          0.21200308
## status                  0.19535827    -0.175795073         -0.20001424
## a_stage_regional       -0.12796649     0.127990099          0.07311096
## Married                -0.01036450     0.008579762         -0.01351800
##                        regional_node_examined reginol_node_positive
## age                               -0.03141638            0.01628318
## t_stage                            0.08844292            0.24426697
## n_stage                            0.36808270            0.82874594
## grade                              0.07651544            0.15163577
## tumor_size                         0.11473470            0.24878553
## estrogen_status                   -0.04140430           -0.10087358
## progesterone_status               -0.01390137           -0.07105526
## regional_node_examined             1.00000000            0.49395643
## reginol_node_positive              0.49395643            1.00000000
## survival_months                   -0.03087185           -0.21263747
## status                             0.06199862            0.31134369
## a_stage_regional                  -0.04237809           -0.19505294
## Married                            0.02581466           -0.03581806
##                        survival_months      status a_stage_regional
## age                        -0.08599693  0.06976636       0.04777171
## t_stage                    -0.15971602  0.22525091      -0.23467556
## n_stage                    -0.24737291  0.34556514      -0.25575034
## grade                      -0.15052637  0.19151006      -0.04874199
## tumor_size                 -0.16151178  0.19535827      -0.12796649
## estrogen_status             0.22727403 -0.17579507       0.12799010
## progesterone_status         0.21200308 -0.20001424       0.07311096
## regional_node_examined     -0.03087185  0.06199862      -0.04237809
## reginol_node_positive      -0.21263747  0.31134369      -0.19505294
## survival_months             1.00000000 -0.58302039       0.13738966
## status                     -0.58302039  1.00000000      -0.13123516
## a_stage_regional            0.13738966 -0.13123516       1.00000000
## Married                     0.06161119 -0.08197981       0.03226076
##                             Married
## age                    -0.047964181
## t_stage                 0.003435462
## n_stage                -0.051518259
## grade                  -0.027171798
## tumor_size             -0.010364496
## estrogen_status         0.008579762
## progesterone_status    -0.013518004
## regional_node_examined  0.025814656
## reginol_node_positive  -0.035818059
## survival_months         0.061611186
## status                 -0.081979813
## a_stage_regional        0.032260758
## Married                 1.000000000
cortable<-project_2_numdata|>
  select(-race,-x6th_stage)
cortable$regional_node_examined
##    [1]  9  9 12 15 16 23 24  8  5 21  4  3 33  7 11  3 16  8 28 20 14 23 20 13
##   [25] 15 14 20 17 11 23  3 34  1 17 17  4 12 27 14 20 28 29 18  3  4 13 23 18
##   [49]  7 41 11 10 27 19  5 24  2 12 16 15 40  9 10 19 17  3 19 16  7 25  1  9
##   [73] 10 15  9 11 29 21 17 17  7 15 22 47 22 14  9  9 13 14 10 19 15  6  2 54
##   [97]  5 31 28 21 19  8 13 13  9 18 12 15 27 24  5  7 22 17 16 12  9 23 10  6
##  [121] 19  5 15 10 30 18 15 27 14  3 36 16  5  9  3 32 31 21 18 10  3 20 10 25
##  [145]  9 14  9 29 32 19 27  1 11 13 14 21 11 15 16 22  2 14 17  5 10 23 10  3
##  [169] 17 18 18 23  9  9  8 23 19 12  9 26 12 15 23  9 14 22 16 30  4  2 10 13
##  [193]  1 10  4 15  7  5 28 12 18 28 18 12  6 18 26  3 21 36 26 18 18 28 16 30
##  [217]  2  1 21 14 21  4 10 16 19 15 29 15 34 23 27 17 13 12  3 13 14 36 11 12
##  [241]  9  9  5 13 11 20 10 14 16 47 15 16 29 16  9  7 11  6 16 18 22 18  4 25
##  [265] 29  1 13 13  9 21 25 10  2  8 21  2 26 17  4  9 13 16 10 13  1  6  9  9
##  [289] 18 22  8 15  8  4  4 18 12 17 15 19 16 27 15 10 14 13 12 11  8 14 21 13
##  [313] 17  4 19 12 17  7 25 37  8 24 13 22 15  3 13  3  2 19 12 11 11  2 20 12
##  [337] 19 11 15 12 21 10 10 17  3 15 17 15 17 11  8 18 21 18 10 29  9 18 13 20
##  [361] 35 23 12  7 32 18 18  5 28 10 10 18 11  9  9  1 30  3  7  9 19 57  5 22
##  [385] 12 15 12  3  8 15 25 10  4 26 25 16 23 13 17  8 24  9 10  9 36 19 12  8
##  [409] 21  8 10 30 17 11 12 20 22 14  4 11  1  4 18 20 12 19 15 21 16 18 11 16
##  [433] 17  5 16 23  7  5 19  9 22 22  8 10 19 11 11  7 12  8 13 12 24 12 16 16
##  [457] 20 13  3 30 22 35 11 10  7 30 29 14 12 17  2 24 15 10  9 12  4 19 23 28
##  [481] 17 14 27 12 16 11 19 22 16  8  9 10 20 15  4 24 19 18 12  4  9 17 22  2
##  [505] 11  6  2 16 21 20 12 17 21  7 15 13  6  4  9 30  2 29 13  3  8  5 14  6
##  [529] 15  1 14 11 21 34 11 18 13  7 11 11  4 16 19 20 12 13 30 19 17 12 12 15
##  [553] 12  9 12  6  8  2 12 28 14 24 24  4 26  7 18  8 18 13 22  9 28 21 10 13
##  [577]  6 16 12  2 15 16 17 17 23 25 47 21 19 18 18  3 31 11  2 16 14 16 23 13
##  [601] 14  7 10  7 16 25 28 10 16 22 23 21 21 19  6  2  8  6 12  2  5 14 15  8
##  [625]  3 23 19 19 15  7  7 14 10 10 14 31 17 25  7 12 15 12  4 17 19 24 14 24
##  [649]  6 25  6 19 34 16  3  7 15 19  2 26  9 21  2  9 13 10 17 13  4 15  5 13
##  [673] 21  5 18  6 21 11 18 11  7  4  5  5 13 18 17  5  7 14 23 11 15 13 13 10
##  [697] 43 47 20 14  9 26  7 12 28  1  5 18 18 12 11 20 41 23 10 20 16 21 26 12
##  [721]  6 17 12 11 20 22 26 10  8 15 11  4 16 13 21 14 17  9 10  8  3 12 22 37
##  [745] 18  7 11 23 13 25 12 17 29  1 12  7  1 23  4 10 25 13  1  9  9 11 26  9
##  [769] 18  3  8 24  9 14 24 18  2 15 24  9 23 27 16 15 18 16 17 25 15 15 18 15
##  [793] 12  2 13 20  9 13 16 15 15  9 23 13  2 20 19 29 22  2 15  9 23  7 12  3
##  [817] 13  1 31 15 20 12 18 10 15  7 23 24 13  8 12 13 20  7 22  4 11 14 16 17
##  [841] 10 13 11 19 17 15 25 15 19  9 20 16 14  6 14 11 13 16  7 17 16 13  5 35
##  [865]  1 20 20  5  5 26 15 15 16 20 13 14  7 15 27 22 30  4 34  1 20 14 18 10
##  [889] 16  8 29  4  9  8  3 10 25 18 20  9 17 28 15 20  3 18  9 21 14  7  1 12
##  [913] 15  6 17 35 13 25 18  8 16 26  5  8  9 25 27 13  3 16 29  6 14  3 17 16
##  [937] 17 32  8 16  3  2 16 20 24 13 19 19 11  3 14 12  8  6  1 23  7  2  4 13
##  [961] 12  6 37 15 17  2 14 11  3 12 27  6 19  4 19 12 14 18 15 12 20 10 21  8
##  [985] 17 17  8 20 15 10  6 21  6  8 17 24  7 13 14 27 15 23 15 11  1 24 13 26
## [1009] 16 18  5 16 28 19 12 14 14 23 17 18 31 12 11  9 20 16 18 18  2 13  4 14
## [1033] 12 13  4  3  2  8 11  3  2  2  2 13 10  1 16 12 18  5 28 16 13 15  7 22
## [1057] 17 11 10 16 14  9  8 16 15  6 18 14 15 14  3 22 38  3  5 17  9 23 14 13
## [1081]  9 24  9  9  6 16 18 17 10 24  8 27  7 22 10 12 28 14  9  7 17 26 11 23
## [1105]  9 11 12 32 32  6 13 16  8 19 37  3 18 14 10 21 13 12 15  9  6  7  9 13
## [1129] 31 13 18  4  6 18 12  3 16 11  3 29 26 14  9  8 29 18 27 11 20 19  9 16
## [1153] 13  3  9  2 12 10  7 12 25 26 18 14 17 18  9  9 12 13 14 18 30 14  6 15
## [1177]  2 19 16 14  9 10  9  5  7 11  4 14 10  4  9 12 12  3 16 16 17 24  9 11
## [1201] 16 19 11 14  3  4  5  5 17 12 16 22 11 10  8 18 21 10  4 25  3 19 14 16
## [1225] 15 16 16 11 18 13 11 12
chart.Correlation(cortable, histogram=TRUE, pch=19)
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter

We have to choice 1 between t_stage - tumor size n stage - regional_node_possitive

muldata = project_2_numdata
  
  
cox_model <- coxph(Surv(survival_months, status) ~.,
                   data = muldata)

vifs <- vif(cox_model)  
## Warning in vif.default(cox_model): No intercept: vifs may not be sensible.
print(vifs)
##                             GVIF Df GVIF^(1/(2*Df))
## age                     1.078511  1        1.038514
## race                    1.089620  2        1.021689
## t_stage                 4.204881  1        2.050581
## n_stage                16.052408  1        4.006546
## x6th_stage             42.723885  4        1.598946
## grade                   1.132422  1        1.064153
## tumor_size              2.891806  1        1.700531
## estrogen_status         1.705080  1        1.305787
## progesterone_status     1.579316  1        1.256708
## regional_node_examined  1.766667  1        1.329160
## reginol_node_positive   4.281220  1        2.069111
## a_stage_regional        1.337110  1        1.156335
## Married                 1.094899  1        1.046374

remove t-stage,n_stage and regional_mode_positive due to higher VIF score

final = muldata |>
  select(-t_stage, -n_stage,-reginol_node_positive)
  
cox_model1 <- coxph(Surv(survival_months, status) ~.,
                   data = final)
vifs <- vif(cox_model1)
## Warning in vif.default(cox_model1): No intercept: vifs may not be sensible.
print(vifs)
##                            GVIF Df GVIF^(1/(2*Df))
## age                    1.065375  1        1.032170
## race                   1.084396  2        1.020462
## x6th_stage             2.150931  4        1.100470
## grade                  1.123660  1        1.060028
## tumor_size             1.298750  1        1.139627
## estrogen_status        1.647493  1        1.283547
## progesterone_status    1.541107  1        1.241413
## regional_node_examined 1.247171  1        1.116768
## a_stage_regional       1.327119  1        1.152007
## Married                1.093934  1        1.045913
ph_test <- cox.zph(cox_model1)
print(ph_test)
##                          chisq df       p
## age                     0.1978  1    0.66
## race                    0.9493  2    0.62
## x6th_stage              7.3617  4    0.12
## grade                   0.1293  1    0.72
## tumor_size              0.0858  1    0.77
## estrogen_status        23.1261  1 1.5e-06
## progesterone_status    19.6293  1 9.4e-06
## regional_node_examined  0.0880  1    0.77
## a_stage_regional        0.6691  1    0.41
## Married                 2.3184  1    0.13
## GLOBAL                 47.1764 14 1.8e-05
plot(ph_test) 

The Cox model assumes that hazard ratios are constant over time. A non-significant p-value (p > 0.05) indicates that the PH assumption holds. As GLOBAL 47.176 15 1.8e-05, the model did not meet the assumption, as same as estrogen_status and progesterone_status.We need further improve our model.

cox_model2 <- coxph(Surv(survival_months, status) ~ age + race + x6th_stage + grade + 
                            tumor_size + regional_node_examined + Married,
                            data = final,x = TRUE)
summary(cox_model2)
## Call:
## coxph(formula = Surv(survival_months, status) ~ age + race + 
##     x6th_stage + grade + tumor_size + regional_node_examined + 
##     Married, data = final, x = TRUE)
## 
##   n= 1232, number of events= 616 
## 
##                             coef exp(coef)  se(coef)      z Pr(>|z|)    
## age                     0.018010  1.018174  0.004577  3.935 8.32e-05 ***
## raceOther              -0.448268  0.638733  0.213151 -2.103  0.03546 *  
## raceWhite              -0.316702  0.728548  0.128875 -2.457  0.01399 *  
## x6th_stageIIB           0.410790  1.508008  0.137843  2.980  0.00288 ** 
## x6th_stageIIIA          0.720050  2.054536  0.139497  5.162 2.45e-07 ***
## x6th_stageIIIB          1.364084  3.912139  0.253810  5.374 7.68e-08 ***
## x6th_stageIIIC          1.496444  4.465782  0.152449  9.816  < 2e-16 ***
## grade                   0.369090  1.446418  0.066157  5.579 2.42e-08 ***
## tumor_size              0.002424  1.002427  0.001884  1.287  0.19808    
## regional_node_examined -0.014599  0.985508  0.005693 -2.564  0.01034 *  
## Married                -0.137910  0.871177  0.084599 -1.630  0.10307    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                        exp(coef) exp(-coef) lower .95 upper .95
## age                       1.0182     0.9822    1.0091    1.0273
## raceOther                 0.6387     1.5656    0.4206    0.9700
## raceWhite                 0.7285     1.3726    0.5659    0.9379
## x6th_stageIIB             1.5080     0.6631    1.1510    1.9758
## x6th_stageIIIA            2.0545     0.4867    1.5631    2.7006
## x6th_stageIIIB            3.9121     0.2556    2.3789    6.4337
## x6th_stageIIIC            4.4658     0.2239    3.3123    6.0209
## grade                     1.4464     0.6914    1.2705    1.6467
## tumor_size                1.0024     0.9976    0.9987    1.0061
## regional_node_examined    0.9855     1.0147    0.9746    0.9966
## Married                   0.8712     1.1479    0.7381    1.0283
## 
## Concordance= 0.669  (se = 0.012 )
## Likelihood ratio test= 237.1  on 11 df,   p=<2e-16
## Wald test            = 244  on 11 df,   p=<2e-16
## Score (logrank) test = 265.9  on 11 df,   p=<2e-16
ph_test <- cox.zph(cox_model2)
print(ph_test)
##                           chisq df     p
## age                     0.33833  1 0.561
## race                    0.84145  2 0.657
## x6th_stage              5.93099  4 0.204
## grade                   0.30431  1 0.581
## tumor_size              0.00191  1 0.965
## regional_node_examined  0.01952  1 0.889
## Married                 2.98963  1 0.084
## GLOBAL                 10.89580 11 0.452
plot(ph_test) 

now the assumption is been fully achieved

cox_model3 <- coxph(Surv(survival_months, status) ~ age + race + x6th_stage + grade + 
                            log2(tumor_size+1) + log2(regional_node_examined+1) + Married,
                            data = final)
summary(cox_model3)
## Call:
## coxph(formula = Surv(survival_months, status) ~ age + race + 
##     x6th_stage + grade + log2(tumor_size + 1) + log2(regional_node_examined + 
##     1) + Married, data = final)
## 
##   n= 1232, number of events= 616 
## 
##                                       coef exp(coef)  se(coef)      z Pr(>|z|)
## age                               0.017897  1.018058  0.004587  3.902 9.56e-05
## raceOther                        -0.469027  0.625611  0.212988 -2.202 0.027656
## raceWhite                        -0.325858  0.721907  0.128703 -2.532 0.011345
## x6th_stageIIB                     0.366885  1.443232  0.146188  2.510 0.012084
## x6th_stageIIIA                    0.701186  2.016143  0.149147  4.701 2.59e-06
## x6th_stageIIIB                    1.323244  3.755587  0.257297  5.143 2.71e-07
## x6th_stageIIIC                    1.486605  4.422059  0.159364  9.328  < 2e-16
## grade                             0.364918  1.440396  0.066055  5.524 3.30e-08
## log2(tumor_size + 1)              0.091348  1.095650  0.052305  1.746 0.080735
## log2(regional_node_examined + 1) -0.168514  0.844919  0.051196 -3.292 0.000996
## Married                          -0.133509  0.875019  0.084670 -1.577 0.114837
##                                     
## age                              ***
## raceOther                        *  
## raceWhite                        *  
## x6th_stageIIB                    *  
## x6th_stageIIIA                   ***
## x6th_stageIIIB                   ***
## x6th_stageIIIC                   ***
## grade                            ***
## log2(tumor_size + 1)             .  
## log2(regional_node_examined + 1) ***
## Married                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                  exp(coef) exp(-coef) lower .95 upper .95
## age                                 1.0181     0.9823    1.0089    1.0273
## raceOther                           0.6256     1.5984    0.4121    0.9497
## raceWhite                           0.7219     1.3852    0.5610    0.9290
## x6th_stageIIB                       1.4432     0.6929    1.0837    1.9221
## x6th_stageIIIA                      2.0161     0.4960    1.5051    2.7007
## x6th_stageIIIB                      3.7556     0.2663    2.2681    6.2186
## x6th_stageIIIC                      4.4221     0.2261    3.2357    6.0433
## grade                               1.4404     0.6943    1.2655    1.6395
## log2(tumor_size + 1)                1.0956     0.9127    0.9889    1.2139
## log2(regional_node_examined + 1)    0.8449     1.1835    0.7643    0.9341
## Married                             0.8750     1.1428    0.7412    1.0330
## 
## Concordance= 0.672  (se = 0.012 )
## Likelihood ratio test= 242.2  on 11 df,   p=<2e-16
## Wald test            = 247.1  on 11 df,   p=<2e-16
## Score (logrank) test = 269.2  on 11 df,   p=<2e-16

Add log transformation to the tumor size and regional node_examined, now they are being statistically significant

cox_model4 <- coxph(Surv(survival_months, status) ~ age + race + x6th_stage + grade + 
                            log2(tumor_size+1) + log2(regional_node_examined+1) + Married +                                  Married*log2(regional_node_examined+1),
                            data = final)
summary(cox_model4)
## Call:
## coxph(formula = Surv(survival_months, status) ~ age + race + 
##     x6th_stage + grade + log2(tumor_size + 1) + log2(regional_node_examined + 
##     1) + Married + Married * log2(regional_node_examined + 1), 
##     data = final)
## 
##   n= 1232, number of events= 616 
## 
##                                               coef exp(coef)  se(coef)      z
## age                                       0.018072  1.018237  0.004598  3.931
## raceOther                                -0.482448  0.617270  0.213173 -2.263
## raceWhite                                -0.337154  0.713799  0.128945 -2.615
## x6th_stageIIB                             0.363303  1.438072  0.146187  2.485
## x6th_stageIIIA                            0.700234  2.014224  0.149208  4.693
## x6th_stageIIIB                            1.355960  3.880486  0.258443  5.247
## x6th_stageIIIC                            1.488566  4.430738  0.159240  9.348
## grade                                     0.367548  1.444189  0.066096  5.561
## log2(tumor_size + 1)                      0.091715  1.096053  0.052194  1.757
## log2(regional_node_examined + 1)         -0.254087  0.775624  0.073140 -3.474
## Married                                  -0.687701  0.502731  0.356035 -1.932
## log2(regional_node_examined + 1):Married  0.147764  1.159240  0.092403  1.599
##                                          Pr(>|z|)    
## age                                      8.47e-05 ***
## raceOther                                0.023625 *  
## raceWhite                                0.008930 ** 
## x6th_stageIIB                            0.012948 *  
## x6th_stageIIIA                           2.69e-06 ***
## x6th_stageIIIB                           1.55e-07 ***
## x6th_stageIIIC                            < 2e-16 ***
## grade                                    2.68e-08 ***
## log2(tumor_size + 1)                     0.078884 .  
## log2(regional_node_examined + 1)         0.000513 ***
## Married                                  0.053414 .  
## log2(regional_node_examined + 1):Married 0.109791    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                          exp(coef) exp(-coef) lower .95
## age                                         1.0182     0.9821    1.0091
## raceOther                                   0.6173     1.6200    0.4065
## raceWhite                                   0.7138     1.4010    0.5544
## x6th_stageIIB                               1.4381     0.6954    1.0798
## x6th_stageIIIA                              2.0142     0.4965    1.5035
## x6th_stageIIIB                              3.8805     0.2577    2.3383
## x6th_stageIIIC                              4.4307     0.2257    3.2429
## grade                                       1.4442     0.6924    1.2687
## log2(tumor_size + 1)                        1.0961     0.9124    0.9895
## log2(regional_node_examined + 1)            0.7756     1.2893    0.6720
## Married                                     0.5027     1.9891    0.2502
## log2(regional_node_examined + 1):Married    1.1592     0.8626    0.9672
##                                          upper .95
## age                                         1.0275
## raceOther                                   0.9374
## raceWhite                                   0.9190
## x6th_stageIIB                               1.9152
## x6th_stageIIIA                              2.6984
## x6th_stageIIIB                              6.4398
## x6th_stageIIIC                              6.0537
## grade                                       1.6439
## log2(tumor_size + 1)                        1.2141
## log2(regional_node_examined + 1)            0.8952
## Married                                     1.0102
## log2(regional_node_examined + 1):Married    1.3894
## 
## Concordance= 0.673  (se = 0.012 )
## Likelihood ratio test= 244.8  on 12 df,   p=<2e-16
## Wald test            = 248.3  on 12 df,   p=<2e-16
## Score (logrank) test = 271.1  on 12 df,   p=<2e-16
cox_model5 <- coxph(Surv(survival_months, status) ~ age + race + x6th_stage + grade + 
                            log2(tumor_size+1) + log2(regional_node_examined+1),
                            data = final)
summary(cox_model5)
## Call:
## coxph(formula = Surv(survival_months, status) ~ age + race + 
##     x6th_stage + grade + log2(tumor_size + 1) + log2(regional_node_examined + 
##     1), data = final)
## 
##   n= 1232, number of events= 616 
## 
##                                       coef exp(coef)  se(coef)      z Pr(>|z|)
## age                               0.018557  1.018731  0.004578  4.054 5.04e-05
## raceOther                        -0.516231  0.596765  0.210954 -2.447 0.014400
## raceWhite                        -0.368703  0.691631  0.125884 -2.929 0.003401
## x6th_stageIIB                     0.353730  1.424370  0.146028  2.422 0.015420
## x6th_stageIIIA                    0.685477  1.984718  0.149028  4.600 4.23e-06
## x6th_stageIIIB                    1.292146  3.640592  0.256483  5.038 4.71e-07
## x6th_stageIIIC                    1.483916  4.410184  0.159225  9.320  < 2e-16
## grade                             0.368485  1.445543  0.066109  5.574 2.49e-08
## log2(tumor_size + 1)              0.097260  1.102147  0.052088  1.867 0.061869
## log2(regional_node_examined + 1) -0.168850  0.844636  0.051210 -3.297 0.000977
##                                     
## age                              ***
## raceOther                        *  
## raceWhite                        ** 
## x6th_stageIIB                    *  
## x6th_stageIIIA                   ***
## x6th_stageIIIB                   ***
## x6th_stageIIIC                   ***
## grade                            ***
## log2(tumor_size + 1)             .  
## log2(regional_node_examined + 1) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                  exp(coef) exp(-coef) lower .95 upper .95
## age                                 1.0187     0.9816    1.0096    1.0279
## raceOther                           0.5968     1.6757    0.3947    0.9023
## raceWhite                           0.6916     1.4459    0.5404    0.8852
## x6th_stageIIB                       1.4244     0.7021    1.0699    1.8964
## x6th_stageIIIA                      1.9847     0.5038    1.4820    2.6580
## x6th_stageIIIB                      3.6406     0.2747    2.2022    6.0185
## x6th_stageIIIC                      4.4102     0.2267    3.2279    6.0254
## grade                               1.4455     0.6918    1.2699    1.6455
## log2(tumor_size + 1)                1.1021     0.9073    0.9952    1.2206
## log2(regional_node_examined + 1)    0.8446     1.1839    0.7640    0.9338
## 
## Concordance= 0.669  (se = 0.012 )
## Likelihood ratio test= 239.8  on 10 df,   p=<2e-16
## Wald test            = 243.7  on 10 df,   p=<2e-16
## Score (logrank) test = 266  on 10 df,   p=<2e-16
cox_model6 <- coxph(Surv(survival_months, status) ~ age + race + x6th_stage + grade + 
                            log2(regional_node_examined+1),
                            data = final)
summary(cox_model6)
## Call:
## coxph(formula = Surv(survival_months, status) ~ age + race + 
##     x6th_stage + grade + log2(regional_node_examined + 1), data = final)
## 
##   n= 1232, number of events= 616 
## 
##                                       coef exp(coef)  se(coef)      z Pr(>|z|)
## age                               0.017705  1.017863  0.004567  3.877 0.000106
## raceOther                        -0.513619  0.598326  0.210909 -2.435 0.014881
## raceWhite                        -0.367476  0.692480  0.125910 -2.919 0.003517
## x6th_stageIIB                     0.461184  1.585950  0.134452  3.430 0.000603
## x6th_stageIIIA                    0.821688  2.274336  0.129512  6.345 2.23e-10
## x6th_stageIIIB                    1.413804  4.111567  0.247599  5.710 1.13e-08
## x6th_stageIIIC                    1.622178  5.064110  0.140484 11.547  < 2e-16
## grade                             0.374943  1.454908  0.066050  5.677 1.37e-08
## log2(regional_node_examined + 1) -0.173489  0.840726  0.051254 -3.385 0.000712
##                                     
## age                              ***
## raceOther                        *  
## raceWhite                        ** 
## x6th_stageIIB                    ***
## x6th_stageIIIA                   ***
## x6th_stageIIIB                   ***
## x6th_stageIIIC                   ***
## grade                            ***
## log2(regional_node_examined + 1) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                  exp(coef) exp(-coef) lower .95 upper .95
## age                                 1.0179     0.9825    1.0088    1.0270
## raceOther                           0.5983     1.6713    0.3957    0.9046
## raceWhite                           0.6925     1.4441    0.5410    0.8863
## x6th_stageIIB                       1.5859     0.6305    1.2186    2.0641
## x6th_stageIIIA                      2.2743     0.4397    1.7645    2.9315
## x6th_stageIIIB                      4.1116     0.2432    2.5308    6.6798
## x6th_stageIIIC                      5.0641     0.1975    3.8452    6.6693
## grade                               1.4549     0.6873    1.2782    1.6560
## log2(regional_node_examined + 1)    0.8407     1.1894    0.7604    0.9296
## 
## Concordance= 0.668  (se = 0.012 )
## Likelihood ratio test= 236.2  on 9 df,   p=<2e-16
## Wald test            = 238.6  on 9 df,   p=<2e-16
## Score (logrank) test = 261  on 9 df,   p=<2e-16

Model Evaluation

summary(final$survival_months)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   43.00   62.00   61.24   82.00  107.00
library(timeROC)
cox_models <- list(cox_model1, cox_model2, cox_model3, cox_model4, cox_model5, cox_model6)

time_points <- c(2,43,62,82,107)

dynamic_auc_results <- list()

for (i in 1:length(cox_models)) {
  
  current_model <- cox_models[[i]]
  time_roc <- timeROC(T = final$survival_months, 
                    delta = final$status, 
                    marker = predict(current_model, type = "lp"), 
                    cause = 1,  # Event of interest
                    times = time_points,  # Time points
                    iid = TRUE)

  
  dynamic_auc_results[[paste0("Model_", i)]] <- time_roc$AUC
  
  print(paste("AUC for Model", i, "at time points:", paste(time_points, collapse = ", ")))
  print(round(time_roc$AUC, 3))
}
## [1] "AUC for Model 1 at time points: 2, 43, 62, 82, 107"
##   t=2  t=43  t=62  t=82 t=107 
##    NA 0.746 0.721 0.724    NA 
## [1] "AUC for Model 2 at time points: 2, 43, 62, 82, 107"
##   t=2  t=43  t=62  t=82 t=107 
##    NA 0.700 0.697 0.721    NA 
## [1] "AUC for Model 3 at time points: 2, 43, 62, 82, 107"
##   t=2  t=43  t=62  t=82 t=107 
##    NA 0.702 0.700 0.723    NA 
## [1] "AUC for Model 4 at time points: 2, 43, 62, 82, 107"
##   t=2  t=43  t=62  t=82 t=107 
##    NA 0.703 0.702 0.724    NA 
## [1] "AUC for Model 5 at time points: 2, 43, 62, 82, 107"
##   t=2  t=43  t=62  t=82 t=107 
##    NA 0.698 0.700 0.722    NA 
## [1] "AUC for Model 6 at time points: 2, 43, 62, 82, 107"
##   t=2  t=43  t=62  t=82 t=107 
##    NA 0.696 0.696 0.721    NA

model 4 tends to have a relatively higher AUC throughout different time points

C-index

cox_model1$concordance
##   concordant   discordant       tied.x       tied.y      tied.xy  concordance 
## 3.518110e+05 1.553440e+05 0.000000e+00 2.219000e+03 0.000000e+00 6.936952e-01 
##          std 
## 1.132421e-02
cox_model2$concordance
##   concordant   discordant       tied.x       tied.y      tied.xy  concordance 
## 3.394840e+05 1.676700e+05 1.000000e+00 2.219000e+03 0.000000e+00 6.693900e-01 
##          std 
## 1.156667e-02
cox_model3$concordance
##   concordant   discordant       tied.x       tied.y      tied.xy  concordance 
## 3.405910e+05 1.665630e+05 1.000000e+00 2.219000e+03 0.000000e+00 6.715728e-01 
##          std 
## 1.155125e-02
cox_model4$concordance
##  concordant  discordant      tied.x      tied.y     tied.xy concordance 
## 3.41080e+05 1.66074e+05 1.00000e+00 2.21900e+03 0.00000e+00 6.72537e-01 
##         std 
## 1.15843e-02
cox_model5$concordance
##   concordant   discordant       tied.x       tied.y      tied.xy  concordance 
## 3.394580e+05 1.676950e+05 2.000000e+00 2.219000e+03 0.000000e+00 6.693397e-01 
##          std 
## 1.161257e-02
cox_model6$concordance
##   concordant   discordant       tied.x       tied.y      tied.xy  concordance 
## 3.387630e+05 1.683460e+05 4.600000e+01 2.219000e+03 0.000000e+00 6.680127e-01 
##          std 
## 1.158565e-02

Concordance: model1 69.4 model2 66.9 model3 67.2 model4 67.3 (overall account for meeting the model assumption and the multicollinearity, model 4 have the highest concordance score, although accompanied with “married” variable that is not statistically significant) model5 66.9 model6 66.8

dev_residuals <- residuals(cox_model4, type = "deviance")
plot(dev_residuals, main = "Deviance Residuals", ylab = "Residuals", xlab = "Index")
abline(h = c(-2, 2), col = "red", lty = 2) 

surv_fit <- survfit(Surv(survival_months, status) ~ 1, data = final)

plot(surv_fit, xlab = "Time (months)", ylab = "Survival Probability", 
     main = "Survival Curve for the Final Model", col = "blue", lwd = 2)

grid()